*This file uses the IQVIA LRx Data to prepare the data used to construct the IQVIA-based instrument.

*fix AD data

cd `data'/AD
use AD_Patient.dta, clear
generate age_at_first = floor(PAT_FIRST/10000)-PAT_BRTH
drop if age_at_first==.
tempfile ad
save `ad', replace

cd `data'/AA
use AA_Patient.dta, clear
generate age_at_first = floor(PAT_FIRST/10000)-PAT_BRTH
drop if age_at_first==.

append using `ad'

*keep only those with first-appearance before age 18; if after, no point of testing
keep if age_at_first<=18
by PATIENT_ID, s: keep if _n==1

keep PATIENT_ID PAT_BRTH_YR_NBR

*save initial script kids
cd `save_data'
save initial_script_1018.dta, replace

*id where the kid was when they received initial prescription + who gave it to them
forvalues i = 2006(1)2018 {
cd `data'/AA
use AA_PatientActivity`i'.dta, clear
cd `save_data'
fmerge m:1 PATIENT_ID using initial_script_1018.dta
keep if _merge==3
drop _merge
tempfile temp`i'
save temp`i'
}

use temp2006, clear
forvalues i = 2007(1)2018 {
append using temp`i'
}
*save allAA_initial.dta
by PATIENT_ID, s: egen earliest = min(MONTH_ID)
keep if MONTH_ID==earliest
generate type = "AA"

save earliest_AA.dta, replace

*id where the kid was when they received initial prescription + who gave it to them
forvalues i = 2006(1)2018 {
cd `data'/AD
use AD_PatientActivity`i'.dta, clear
cd `save_data'
fmerge m:1 PATIENT_ID using initial_script_1018.dta
keep if _merge==3
drop _merge
tempfile temp`i'
save tempAD`i'
}

use tempAD2006, clear
forvalues i = 2007(1)2018 {
append using tempAD`i'
}
by PATIENT_ID, s: egen earliest = min(MONTH_ID)
keep if MONTH_ID==earliest
generate type = "AD"

save earliest_AD.dta, replace

*stack the files
use earliest_AD.dta, clear
append using earliest_AA.dta
by PATIENT_ID, s: egen earliest2 = min(earliest)
generate earliest_all = earliest2==earliest
keep if earliest_all
drop earliest2 earliest_all

generate year = floor(MONTH_ID/100)
generate age = year-PAT_BRTH

*first script had to occur between 10-18 years old
keep if age>=10 & age<=18

*first script had to be between 2011-2018
keep if year>=2011

*parse variables
keep PATIENT_ID MONTH_ID year age type
generate birth = year-age

generate type2 = type=="AA"

*save earliest scripts
save earliest_1018.dta, replace

*id where the kid was when they received initial prescription + who gave it to them

forvalues i = 2011(1)2018 {
cd `data'/AA
use AA_FactRx`i'.dta, clear
generate type2=1
cd `save_data'
fmerge m:1 PATIENT_ID MONTH_ID type2 using earliest_1018.dta
keep if _merge==3
drop _merge
drop if RX_TYP_CD==1
keep PATIENT_ID PAT_ZIP3 PROVIDER_ID year
cd `save_data'
save AA_instrZ_`i'.dta, replace
}

use AA_instrZ_2011.dta, clear
forvalues i = 2012(1)2018 {
append using AA_instrZ_`i'
}
save AA_instrZ.dta, replace

forvalues i = 2011(1)2018 {
cd `data'/AD
use AD_FactRx`i'.dta, clear
generate type2=1
cd `save_data'
fmerge m:1 PATIENT_ID MONTH_ID type2 using earliest_1018.dta
keep if _merge==3
drop _merge
drop if RX_TYP_CD==1
keep PATIENT_ID PAT_ZIP3 PROVIDER_ID year
cd `save_data'
save AD_instrZ_`i'.dta, replace
}

use AD_instrZ_2011.dta, clear
forvalues i = 2012(1)2018 {
append using AD_instrZ_`i'
}
save AD_instrZ.dta, replace

*stack the files
use AA_instrZ.dta, clear
generate type = "AA"
append using AD_instrZ.dta
replace type = "AD" if type==""
drop if PAT_ZIP==.
duplicates drop

*patient only has records within one year
by PATIENT PROVIDER, s: keep if _n==1
drop type

by PATIENT, s: generate records = _N
drop records

cd `data'/AA
merge m:1 PROVIDER_ID using AA_Provider.dta
drop if _merge==2
drop _merge

cd `data'/AD
merge m:1 PROVIDER_ID using AD_Provider.dta, update
drop if _merge==2
drop _merge

drop if PRI_SPCL_DESC=="VETERINARIAN"
keep if PROVIDER_TYP==1
drop if npi==.

keep PATIENT_ID-year npi
drop PROVIDER_ID PATIENT_ID

cd `save_data'
save all_instrZ_noPAT.dta, replace


